from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
import time
from selenium import webdriver
import undetected_chromedriver as uc
import pymysql
import random

#已完成爬虫 可见数据库

options = webdriver.ChromeOptions()
options.add_argument(r"--user-data-dir=C:\Users\12036\AppData\Local\Google\Chrome\User Data")
# #无界面浏览器模式
# options.add_argument('--headless')
# options.add_argument('--disable-gpu')
options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option('useAutomationExtension', False)
# options.add_argument(r'--profile-directory=Default') #e.g. Profile 3
browser = webdriver.Chrome(options=options)
wait = WebDriverWait(browser, 10)  # 超时时长为10s


detail_url_list=[]
url ="http://www.gudianmingzhu.com/zuozhe/index.html"
for page in range(1,722): #722页
    try:
        if(page!=1):
            url="http://www.gudianmingzhu.com/zuozhe/index_"+str(page)+".html"
        browser.get(url)
        div_sons = browser.find_elements(By.XPATH, ".//body/div[3]/div[2]/div")
        for i in range(1, len(div_sons)-1):
            # 获取诗人个人详情界面的url
            detail_url_list.append(div_sons[i].find_element(By.XPATH, ".//h2/a").get_attribute("href").strip())
            # print(detail_url_list)
    except:
        # print("出现异常")
        continue
    page = page + 1
    time.sleep(random.randint(3, 5))

data_list = []
for detail_url in detail_url_list:
    # 个人信息定位
    data = []
    browser.get(detail_url)
    data.append(browser.find_element(By.XPATH, './/body/div[3]/div[2]/div[1]/div[1]/h2').text)
    try:
        data.append(browser.find_element(By.XPATH, './/body/div[3]/div[2]/div[1]/div[1]/div[1]/img').get_attribute('src'))#获取图片链接
        data.append(browser.find_element(By.XPATH, './/body/div[3]/div[2]/div[1]/div[1]/div[2]').text)
    except:  # 无图像
        data.append('')
        data.append(str(browser.find_element(By.XPATH, '//*[@id="leftdg"]/div[1]/div/div').text))
    data_list.append(data)

#print(data_list)

dbpath = "poetry_information.db"
db = pymysql.connect(host='127.0.0.1', port=3306, user='root', db='poetry_information', password='root', charset='utf8')
cursor = db.cursor()
cursor.execute("DROP TABLE IF EXISTS poetry")
sql='''
    create table poetry
    (
    id INT AUTO_INCREMENT PRIMARY KEY,
    poetry_name TEXT,
    image TEXT,
    summary TEXT,
    url TEXT,
    code TEXT
    ) 
'''
cursor.execute(sql)

#code作为诗人地区代码 后续使用

cur = db.cursor()
sql1 = '''
    INSERT INTO poetry(
    poetry_name,image,summary,url)
    VALUES(%s,%s,%s,%s)'''
for i in range(0,len(data_list)-1):
    cur.execute(sql1,(data_list[i][0],data_list[i][1],data_list[i][2],detail_url_list[i]))
db.commit()
cur.close()
db.close()

# 退出浏览器
browser.quit()

